#### Appendix ####
# Table A1: Summary Stats (1) US #### 
library(tidyverse)

data <- read.csv("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv", stringsAsFactors = FALSE)
data <- data %>% filter(Q2.1 == "1,3")

## Education
data <- data %>% mutate(educ = case_when(Q11.3 == 1 ~ "less than high school",
                                          Q11.3 == 2 ~ "high school",
                                          Q11.3 == 3 ~ "some college",
                                          Q11.3 == 6 ~ "college",
                                          Q11.3 == 7 ~ "graduate degree"))
data %>% group_by(educ) %>%
  summarise(prop = n()/nrow(data)*100)


## Age
data %>% summarise(sd = sd(as.numeric(age), na.rm=T),
                   mean = mean(as.numeric(age), na.rm=T), 
                   range = paste(min(as.numeric(age), na.rm = T), "-", max(as.numeric(Q11.15_1), na.rm = T)),
                   n = sum(!is.na(age))) 
## Gender
table(data$Q11.6)/939

data %>% summarise(sd = sd(as.numeric(Q11.6), na.rm=T),
                   mean = mean(as.numeric(Q11.6), na.rm=T), 
                   range = paste(min(as.numeric(Q11.6), na.rm = T), "-", max(as.numeric(Q11.6), na.rm = T)),
                   n = sum(!is.na(Q11.6))) 

## Racial Group
table(data$Q11.13)/939
data <- data %>% mutate(white = case_when(Q11.13 == 1 ~ 1, 
                                          Q11.13 == 2 ~ 0,
                                          Q11.13 == 3 ~ 0,
                                          Q11.13 == 4 ~ 0,
                                          Q11.13 == 5 ~ 0,
                                          Q11.13 == 6 ~ 0))
data %>% group_by(ethnicity) %>%
  summarise(prop = n()/nrow(data)*100)

data %>% summarise(sd = sd(as.numeric(white), na.rm=T),
                   mean = mean(as.numeric(white), na.rm=T), 
                   range = paste(min(as.numeric(white), na.rm = T), "-", max(as.numeric(white), na.rm = T)),
                   n = sum(!is.na(white))) 

## Party ID
table(data$Q3.1)/939
data %>% group_by(Q3.1) %>% 
  summarise(prop = n()/nrow(data)* 100)
table(data$Q3.4)/939

data <- data %>% mutate(age_group = case_when(age < 30 ~  "18-29",
                                              age > 29 & age < 45 ~ "30-44",
                                              age > 44 & age < 65 ~ "45-64",
                                              age > 64 ~ "65 and over"))

data %>% group_by(age_group) %>% summarise(prop = n()/nrow(data)*100)



# Table A2: UK Sample Summary Stats ####
uk <- read.csv("UK_analysis/Working-Class+Women+Conjoint+-+UK_February+9,+2022_03.16.csv", stringsAsFactors = FALSE)
uk <- uk %>% filter(Q2.1 == "1,3")

uk %>% summarise(sd = sd(as.numeric(Q11.3), na.rm=T),
                   mean = mean(as.numeric(Q11.3), na.rm=T), 
                   range = paste(min(as.numeric(Q11.3), na.rm = T), "-", max(as.numeric(Q11.3), na.rm = T)),
                   n = sum(!is.na(Q11.3))) 
# age
uk$age <- uk$Q11.15
uk <- uk %>% mutate(age_group = case_when(age < 30 ~  "18-29",
                                          age > 29 & age < 45 ~ "30-44",
                                          age > 44 & age < 65 ~ "45-64",
                                          age > 64 ~ "65 and over"))

table(uk$age_group)/992

# education
table(as.numeric(uk$Q11.3))/992

# gender
table(as.numeric(uk$Q11.6))/992

# Race
uk <- uk %>% mutate(white = case_when(Q11.13 ==1 ~  1,
                                      Q11.13 ==11 ~  1,
                                      Q11.6!=1 & Q11.6!=11  ~0))
table(uk$white)/992

# party
table(uk$Q3.1)/992

## nationality 
table(uk$Q123)



# Figure A1: Democratic respondents AMCE ####
dem.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q5.3", "Q6.3", "Q7.3"),
                           #                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)
dem.resp <- dem.resp %>% drop_na(selected)

data <- dem.resp

data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                        Gender = fct_relevel(Gender, "Male"),
                        Working = factor(case_when(Former.Occupation == "Janitor" ~ "Working Class",
                                                   Former.Occupation == "Retail clerk" ~ "Working Class",
                                                   Former.Occupation == "Lawyer" ~ "White Collar",
                                                   Former.Occupation == "Business entrepreneur" ~ "White Collar")),
                        Working = fct_relevel(Working, "White Collar"))
model <- amce(selected ~  Age + Education + Working + Gender + Years.in.Politics, data = data, cluster=TRUE, respondent.id = "Response.ID")
model$estimates

labels <- c("Gender", "    Male", "    Female",
            "Occupational Background", "    White Collar","    Working Class",
            "Age", "   30", "   40", "   50", "   60", 
            "Education", "    High school graduate", "    University graduate", "    Post-graduate (Master or Doctoral degree)", 
            "Years in Politics", "    0", "    1", "    3", "    8"
)


estimates <- c(NA, 0, model$estimates[3][[1]][[1]], # gender
               NA, 0, model$estimates[4][[1]][[1]], # class
               NA, 0, model$estimates[1][[1]][[1]], model$estimates[1][[1]][[3]],model$estimates[1][[1]][[5]],
               NA, 0, model$estimates[2][[1]][[1]], model$estimates[2][[1]][[3]],
               NA, 0, model$estimates[5][[1]][[1]],  model$estimates[5][[1]][[3]],  model$estimates[5][[1]][[5]]
)

se <- c(NA, 0, model$estimates[3][[1]][[2]],
        NA, 0, model$estimates[4][[1]][[2]],
        NA, 0, model$estimates[1][[1]][[2]], model$estimates[1][[1]][[4]],model$estimates[1][[1]][[6]],
        NA, 0, model$estimates[2][[1]][[2]], model$estimates[2][[1]][[4]],
        NA, 0, model$estimates[5][[1]][[2]],  model$estimates[5][[1]][[4]],  model$estimates[5][[1]][[6]]
)

face <- c("bold", "plain", "plain",
          "bold", "plain", "plain",
          "bold", "plain", "plain", "plain", "plain",
          "bold", "plain", "plain", "plain",
          "bold", "plain", "plain", "plain", "plain")

type = c("gender", "gender", "gender",
         "occupation", "occupation", "occupation", 
         "age", "age","age","age", "age",
         "education", "education", "education", "education",
         "year", "year", "year", "year", "year")

support <- data.frame(labels = factor(labels, levels = rev(labels)), 
                      estimates = estimates,
                      se = se,
                      face = face,
                      type = type)

support <- support %>% mutate(ul = estimates + 1.96 * se,
                              ll = estimates - 1.96 * se,
                              ref = ifelse(estimates == 0 | is.na(estimates), "reference", "normal"),
                              dv = "Candidate Preference")

support

# AMCE plot:
#setwd("~/Dropbox/Gender Quota in Korea/_Pink-CollarWorker/Data")
#pdf("AMCE_Democrats.pdf")
ggplot(support, aes(x=labels, y=estimates, ymin=ll, ymax=ul, shape = type)) +
  labs(y = "Effect on Pr(Being Selected)") +
  geom_pointrange() + 
  theme_bw()  +
  coord_flip(ylim  = c(-0.3, 0.3)) + 
  scale_x_discrete(name="",labels=rev(labels)) +
  facet_wrap(vars(dv), nrow = 1) +
  geom_hline(yintercept = 0,colour="darkgrey",linetype=1) +
  theme(
    axis.text.y = element_text(colour = "black",
                               hjust = 0 , vjust=.5,
                               face = rev(face), size = 14),
    axis.ticks.y = element_blank(),
    strip.text = element_text(size = 14),
    legend.position = "none"
  ) + scale_shape_manual(values = c(18, 2, 20, 15, 7 ))


# Figure A2: Republican respondents AMCE ####
rep.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q8.3", "Q9.3", "Q10.3"),
                           respondentID = "ResponseId", new.format = TRUE)
rep.resp <- rep.resp %>% drop_na(selected)

data <- rep.resp

data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                        Gender = fct_relevel(Gender, "Male"),
                        Working = factor(case_when(Former.Occupation == "Janitor" ~ "Working Class",
                                                   Former.Occupation == "Retail clerk" ~ "Working Class",
                                                   Former.Occupation == "Lawyer" ~ "White Collar",
                                                   Former.Occupation == "Business entrepreneur" ~ "White Collar")),
                        Working = fct_relevel(Working, "White Collar"))
model <- amce(selected ~  Age + Education + Working + Gender + Years.in.Politics, data = data, cluster=TRUE, respondent.id = "Response.ID")
model$estimates

labels <- c("Gender", "    Male", "    Female",
            "Occupational Background", "    White Collar","    Working Class",
            "Age", "   30", "   40", "   50", "   60", 
            "Education", "    High school graduate", "    University graduate", "    Post-graduate (Master or Doctoral degree)", 
            "Years in Politics", "    0", "    1", "    3", "    8"
)


estimates <- c(NA, 0, model$estimates[3][[1]][[1]], # gender
               NA, 0, model$estimates[4][[1]][[1]], # class
               NA, 0, model$estimates[1][[1]][[1]], model$estimates[1][[1]][[3]],model$estimates[1][[1]][[5]],
               NA, 0, model$estimates[2][[1]][[1]], model$estimates[2][[1]][[3]],
               NA, 0, model$estimates[5][[1]][[1]],  model$estimates[5][[1]][[3]],  model$estimates[5][[1]][[5]]
)

se <- c(NA, 0, model$estimates[3][[1]][[2]],
        NA, 0, model$estimates[4][[1]][[2]],
        NA, 0, model$estimates[1][[1]][[2]], model$estimates[1][[1]][[4]],model$estimates[1][[1]][[6]],
        NA, 0, model$estimates[2][[1]][[2]], model$estimates[2][[1]][[4]],
        NA, 0, model$estimates[5][[1]][[2]],  model$estimates[5][[1]][[4]],  model$estimates[5][[1]][[6]]
)

face <- c("bold", "plain", "plain",
          "bold", "plain", "plain",
          "bold", "plain", "plain", "plain", "plain",
          "bold", "plain", "plain", "plain",
          "bold", "plain", "plain", "plain", "plain")

type = c("gender", "gender", "gender",
         "occupation", "occupation", "occupation", 
         "age", "age","age","age", "age",
         "education", "education", "education", "education",
         "year", "year", "year", "year", "year")

support <- data.frame(labels = factor(labels, levels = rev(labels)), 
                      estimates = estimates,
                      se = se,
                      face = face,
                      type = type)

support <- support %>% mutate(ul = estimates + 1.96 * se,
                              ll = estimates - 1.96 * se,
                              ref = ifelse(estimates == 0 | is.na(estimates), "reference", "normal"),
                              dv = "Candidate Preference")

support

# AMCE plot:
ggplot(support, aes(x=labels, y=estimates, ymin=ll, ymax=ul, shape = type)) +
  labs(y = "Effect on Pr(Being Selected)") +
  geom_pointrange() + 
  theme_bw()  +
  coord_flip(ylim  = c(-0.3, 0.3)) + 
  scale_x_discrete(name="",labels=rev(labels)) +
  facet_wrap(vars(dv), nrow = 1) +
  geom_hline(yintercept = 0,colour="darkgrey",linetype=1) +
  theme(
    axis.text.y = element_text(colour = "black",
                               hjust = 0 , vjust=.5,
                               face = rev(face), size = 14),
    axis.ticks.y = element_blank(),
    strip.text = element_text(size = 14),
    legend.position = "none"
  ) + scale_shape_manual(values = c(18, 2, 20, 15, 7 ))


# Table A3 & A4: Diagnostic Check ####
library(foreign)
library(readstata13)
library(tidyverse)
library(cowplot)
library(cjoint);library(FindIt)

#### Candidate Support 
setwd("~/Dropbox/Gender Quota in Korea/_Pink-CollarWorker/Data")

dem.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q5.3", "Q6.3", "Q7.3"),
                                                    covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)
dem.resp <- dem.resp %>% drop_na(selected)
rep.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q8.3", "Q9.3", "Q10.3"),
                                                 covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)
rep.resp <- rep.resp %>% drop_na(selected)


data <- rbind(dem.resp, rep.resp)

data <- data %>% drop_na(selected)

data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                         Education = fct_relevel(Education, "High school graduate"),
                        Gender = fct_relevel(Gender, "Male"),
                        Working = factor(case_when(Former.Occupation == "Janitor" ~ "Working Class",
                                                   Former.Occupation == "Retail clerk" ~ "Working Class",
                                                   Former.Occupation == "Lawyer" ~ "White Collar",
                                                   Former.Occupation == "Business entrepreneur" ~ "White Collar")),
                        Working = fct_relevel(Working, "White Collar"))

mod1.1 <- lm(selected ~ Working* task, data)
mod1.2 <- lm(selected ~ Gender* task, data)

mod1.3 <- lm(selected ~ Working * profile, data)
mod1.4 <- lm(selected ~ Gender * profile, data)

## Competency
dem.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q5.7", "Q6.7", "Q7.7"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

rep.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q8.7", "Q9.7", "Q10.7"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

data <- rbind(dem.resp, rep.resp)
data <- data %>% drop_na(selected)

data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                        Gender = fct_relevel(Gender, "Male"),
                        Working = factor(case_when(Former.Occupation == "Janitor" ~ "Working Class",
                                                   Former.Occupation == "Retail clerk" ~ "Working Class",
                                                   Former.Occupation == "Lawyer" ~ "White Collar",
                                                   Former.Occupation == "Business entrepreneur" ~ "White Collar")),
                        Working = fct_relevel(Working, "White Collar"))


mod2.1 <- lm(selected ~ Working* task, data)
mod2.2 <- lm(selected ~ Gender* task, data)

mod2.3 <- lm(selected ~ Working * profile, data)
mod2.4 <- lm(selected ~ Gender * profile, data)

library(stargazer)
# Table A3: Carryover effects
stargazer(mod1.1, mod1.2, mod2.1, mod2.2)
# Table A4: Profile order effects
stargazer(mod1.3, mod1.4, mod2.3, mod2.4)


# Figure A5: Analysis by Occupation ####
# Competency
dem.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q5.7", "Q6.7", "Q7.7"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

rep.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q8.7", "Q9.7", "Q10.7"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

data <- rbind(dem.resp, rep.resp)
data <- data %>% drop_na(selected)
data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                        Gender = fct_relevel(Gender, "Male"),
                        Occupation = factor(case_when(Former.Occupation == "Janitor" ~ "Janitor",
                                                      Former.Occupation == "Retail clerk" ~ "Retail.Clerk",
                                                      Former.Occupation == "Lawyer" ~ "White-Collar",
                                                      Former.Occupation == "Business entrepreneur" ~ "White-Collar")),
                        Occupation = fct_relevel(Occupation, "White-Collar"))
data <- data %>% mutate(pairid = paste(respondent, task, sep="_"))

fit1 <- CausalANOVA(formula = selected ~  Age + Education + Occupation + Years.in.Politics + Gender , 
                    int2.formula = ~Occupation:Gender, data=data, pair.id = data$pairid, diff = TRUE,
                    cluster = data$Response.ID, nway=2)

cond.estimates<- ConditionalEffect(fit1, treat.fac="Occupation", cond.fac="Gender")$ConditionalEffects

labels <- c( "Janitor", "Retail Clerk","Janitor", "Retail Clerk")



estimates <- c(cond.estimates[[1]][2,1], cond.estimates[[1]][3,1],
               cond.estimates[[2]][2,1], cond.estimates[[2]][3,1])

ll <- c(cond.estimates[[1]][2,3], cond.estimates[[1]][3,3],
        cond.estimates[[2]][2,3],cond.estimates[[2]][3,3])

ul <- c(cond.estimates[[1]][2,4], cond.estimates[[1]][3,4],
        cond.estimates[[2]][2,4],cond.estimates[[2]][3,4])

condition <- c("Candidate Gender = Male", "Candidate Gender = Male", 
               "Candidate Gender = Female", "Candidate Gender = Female")
type = c("male", "male", 
         "female", "female")

issue <- data.frame(labels = factor(labels), 
                    estimates = estimates,
                    ll=ll,
                    ul=ul,
                    type=type, 
                    condition=condition)
issue <- issue %>% mutate(dv = "Issue")

p1 <- ggplot(issue, aes(x=labels, y=estimates, ymin=ll, ymax=ul, shape=type)) +
  scale_shape_manual(values = c(19,1)) +
  labs(y = "Effect on Pr(Being Selected)") +
  geom_pointrange() + 
  theme_bw()  +
  coord_flip(ylim = c(-0.2, 0.2)) + 
  scale_x_discrete(name="",labels=labels) + ylim(c(-0.15, 0.1)) + 
  facet_wrap(vars(condition), nrow = 2) +
  geom_hline(yintercept = 0,colour="darkgrey",linetype=1) +
  theme(
    axis.text.y = element_text(colour = "black",
                               hjust = 0 , vjust=.5,
                               size = 14),
    axis.ticks.y = element_blank(),
    strip.text = element_text(size = 14),
    #axis.ticks.x = element_blank(),
    # axis.title.y = element_text(size = 7,angle=90,
    #                             vjust=.01,hjust=.1),
    legend.position = "none"
  )   + ggtitle("DV: Competency")
p1

# candidate support
dem.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q5.3", "Q6.3", "Q7.3"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

rep.resp <- read.qualtrics("Working-Class+Women+Conjoint_October+13,+2021_13.12.csv",
                           responses = c("Q8.3", "Q9.3", "Q10.3"),
                           covariates = c("Q11.6", "Q11.8", "Pink.Collar", "Working.Class"),
                           respondentID = "ResponseId", new.format = TRUE)

data <- rbind(dem.resp, rep.resp)
data <- data %>% drop_na(selected)
data <- data %>% mutate(Education = factor(Education, levels = c("High school graduate", "College graduate", "Post-graduate (Master or Doctoral degree)")),
                        Gender = fct_relevel(Gender, "Male"),
                        Occupation = factor(case_when(Former.Occupation == "Janitor" ~ "Janitor",
                                                      Former.Occupation == "Retail clerk" ~ "Retail.Clerk",
                                                      Former.Occupation == "Lawyer" ~ "White-Collar",
                                                      Former.Occupation == "Business entrepreneur" ~ "White-Collar")),
                        Occupation = fct_relevel(Occupation, "White-Collar"))
data <- data %>% mutate(pairid = paste(respondent, task, sep="_"))

fit2 <- CausalANOVA(formula = selected ~  Age + Education + Occupation + Years.in.Politics + Gender , 
                    int2.formula = ~Occupation:Gender, data=data, pair.id = data$pairid, diff = TRUE,
                    cluster = data$Response.ID, nway=2)

cond.estimates<- ConditionalEffect(fit2, treat.fac="Occupation", cond.fac="Gender")$ConditionalEffects
cond.estimates
labels <- c( "Janitor", "Retail Clerk","Janitor", "Retail Clerk")



estimates <- c(cond.estimates[[1]][2,1], cond.estimates[[1]][3,1],
               cond.estimates[[2]][2,1], cond.estimates[[2]][3,1])

ll <- c(cond.estimates[[1]][2,3], cond.estimates[[1]][3,3],
        cond.estimates[[2]][2,3],cond.estimates[[2]][3,3])

ul <- c(cond.estimates[[1]][2,4], cond.estimates[[1]][3,4],
        cond.estimates[[2]][2,4],cond.estimates[[2]][3,4])

condition <- c("Candidate Gender = Male", "Candidate Gender = Male", 
               "Candidate Gender = Female", "Candidate Gender = Female")
type = c("male", "male", 
         "female", "female")

support <- data.frame(labels = factor(labels), 
                      estimates = estimates,
                      ll=ll,
                      ul=ul,
                      type=type, 
                      condition=condition)
support <- issue %>% mutate(dv = "support")

p2 <- ggplot(support, aes(x=labels, y=estimates, ymin=ll, ymax=ul, shape=type)) +
  scale_shape_manual(values = c(19,1)) +
  labs(y = "Effect on Pr(Being Selected)") +
  geom_pointrange() + 
  theme_bw()  +
  coord_flip(ylim = c(-0.2, 0.2)) + 
  scale_x_discrete(name="",labels=labels) + ylim(c(-0.15, 0.1)) + 
  facet_wrap(vars(condition), nrow = 2) +
  geom_hline(yintercept = 0,colour="darkgrey",linetype=1) +
  theme(
    axis.text.y = element_text(colour = "black",
                               hjust = 0 , vjust=.5,
                               size = 14),
    axis.ticks.y = element_blank(),
    strip.text = element_text(size = 14),
    legend.position = "none"
  )   + ggtitle("DV: Candidate Support")
p2

ggarrange(p1, p2, ncol=2)

